#!/bin/bash
# Rio: Load CSV from stdin into R as a data.frame, execute given commands, and get the output as CSV or PNG on stdout
#
# Example usage:
# $ < seq 100 | Rio -nf sum (same as Rio -ne 'sum(df)')
#
# $ curl -s 'https://raw.githubusercontent.com/pydata/pandas/master/pandas/tests/data/iris.csv' > iris.csv
# $ < iris.csv Rio -e 'df$SepalLength^2'
# $ < iris.csv Rio -f summary
# $ < iris.csv Rio -se 'sqldf("select Name from df where df.SepalLength > 7")'
# $ < iris.csv Rio -ge 'g+geom_point(aes(x=SepalLength,y=SepalWidth,colour=Name))' > iris.png
#
# Dependency: R (with optionally the R packages ggplot2, dplyr, tidyr, sqldf, and ggmap)
#
# Author: http://jeroenjanssens.com

usage() {
cat << EOF
Rio: Load CSV from stdin into R as a data.frame, execute given commands, and get the output as CSV on stdout

usage: Rio OPTIONS

OPTIONS:
   -d      Delimiter
   -e      Commands to execute
   -f      Single command to execute on data.frame
   -g      Import ggplot2
   -h      Show this message
   -m      Import ggmap
   -n      CSV has no header
   -r      Import dplyr and tidyr
   -s      Import sqldf
   -b      Use same settings as used for book Data Science at the Command Line
   -v      Verbose

EOF
}

finish() {
	rm -f $IN $OUT ${OUT%.png} ${ERR%.err}

        ## Removes error file if error file is empty.
    if [[ ! -s $ERR ]]; then
        rm -f $ERR
    fi

	rm -f Rplots.pdf
}

trap finish EXIT

callR() {
	Rscript --vanilla -e "options(scipen=999);df<-read.csv('${IN}',header=${HEADER},sep='${DELIMITER}',stringsAsFactors=F);${REQUIRES}${SCRIPT}last<-.Last.value;if(is.matrix(last)){last<-as.data.frame(last)};if(is.data.frame(last)){write.table(last,'${OUT}',sep=',',quote=T,qmethod='double',row.names=F,col.names=${HEADER});}else if(is.vector(last)){cat(last,sep='\\\n', file='${OUT}')}else if(exists('is.ggplot')&&is.ggplot(last)){ggsave('${OUT}',last,dpi=${RIO_DPI-72},units='cm',width=20,height=15);}else{sink('${OUT}');print(last);}"
}

SCRIPT=
REQUIRES=
DELIMITER=","
HEADER="T"
VERBOSE=false

# OSX `mktemp' requires a temp file template, but Linux `mktemp' has it as optional.
# This explicitly uses a template, which works for both.  The $TMPDIR is in case
# it isn't set as an enviroment variable, assumes you have /tmp.
if [ -z "$TMPDIR" ]; then
    TMPDIR=/tmp/
fi
IN=$(mktemp ${TMPDIR}/Rio-XXXXXXXX)
OUT=$(mktemp ${TMPDIR}/Rio-XXXXXXXX).png
ERR=$(mktemp ${TMPDIR}/Rio-XXXXXXXX).err

while getopts "d:hgmnprsve:f:b" OPTION
do
	case $OPTION in
		b)
			RIO_DPI=300
			;;
		d)
			DELIMITER=$OPTARG
			;;
		e)
			SCRIPT=$OPTARG
			if ! echo $SCRIPT | grep -qe "; *$"
			then
				SCRIPT="${SCRIPT};"
			fi
			;;
		f)
			SCRIPT="${OPTARG}(df);"
			;;
		h)
			usage
			exit 1
			;;
		g)
			REQUIRES="${REQUIRES}require(ggplot2);g<-ggplot(df);"
			;;
		n)
			HEADER="F"
			;;
		r)
			REQUIRES="${REQUIRES}require(dplyr);require(tidyr);"
			;;
		s)
			REQUIRES="${REQUIRES}require(sqldf);"
			;;
		m)
			REQUIRES="${REQUIRES}require(ggmap);"
			;;
		v)
			VERBOSE=true
			;;
		?)
			usage
			exit
		;;
	esac
done

cat /dev/stdin > $IN

if $VERBOSE
then
	callR
else
	callR > $ERR 2>&1
fi

if [[ ! -f $OUT ]]; then
	cat $ERR
else
	RESULT="$(cat $OUT | tr '\0' '\n')"
	if [ "$RESULT" == "NULL" ]; then
		cat $ERR
	else
		cat $OUT
	fi 
fi

